data_sumtable <-
  readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ0_table")
data_acc <-
  readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ1_acc")
data_imp_features <-
  readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ2_Important_features", na = "NA")
data_dimreduction <-
  readxl::read_excel(path = "Summary_TR_predictions_with_rs.xlsx", sheet = "RQ3_reducing")
data_PROBAST <- readxl::read_excel(path = "PROBAST_assessment.xlsx", sheet = "Main")
colnames(data_PROBAST) <- data_PROBAST[1,]
data_PROBAST <- data_PROBAST[2:14,]

# Replace spaces and hyphens in column names with _
colnames(data_acc) <- gsub(" ","_",colnames(data_acc))
colnames(data_imp_features) <- gsub(" - ","_",colnames(data_imp_features))
colnames(data_imp_features) <- gsub(" ","_",colnames(data_imp_features))
colnames(data_dimreduction) <- gsub(" ","_",colnames(data_dimreduction))

1 RQ 0: Preprocessing of summary table: compress table entries, delete columns, change column names

## Step 1: Modify table entries
# Column: Responders/nonresponders
data_sumtable$`Responders/nonresponders` <- gsub(" responders, ","/",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" nonresponders","",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" remitters, ","/",data_sumtable$`Responders/nonresponders`)
data_sumtable$`Responders/nonresponders` <- gsub(" nonremitters","",data_sumtable$`Responders/nonresponders`)
# Add proportion of nonresponders
# proportion_nonresponders <- character(nrow(data_sumtable))
# row_idx = 8
# for (row_idx in 1:nrow(data_sumtable)){
#   strsplit(data_sumtable$`Responders/nonresponders`[row_idx], "\n")
#   if
#   entry
#   entry <- data_sumtable$`Responders/nonresponders`[row_idx]
#   responders <- strsplit(data_sumtable$`Responders/nonresponders`[row_idx], c("; \r\n"))[[1]][1]
#   nonresponders <- strsplit(entry, "/")[[1]][2]
#   proportion <- round(as.numeric(nonresponders)/(as.numeric(responders)+as.numeric(nonresponders))*100,0)
#   entry <- paste(data_sumtable$`Responders/nonresponders`[row_idx]," (",proportion,"%)", sep = "")
# }



# Column: Treatment
data_sumtable$Treatment <- gsub("atypical antipsychotics","AAP",data_sumtable$Treatment)
#data_sumtable$treatment <- gsub("Alpha2-receptor-antagonists","AAP",data_sumtable$treatment)

# Column: Information on models tested
data_sumtable$`Information on models tested` <- gsub(" of models tested","",data_sumtable$`Information on models tested`)

# Column: Definition of treatment outcome
data_sumtable$`Definition of treatment outcome` <- gsub("\\s*\\([^\\)]+\\)","",data_sumtable$`Definition of treatment outcome`)

# Column: Input features
data_sumtable$`Type of functional-connectivity-based input features` <- gsub("[(]wb:.*)","",data_sumtable$`Type of functional-connectivity-based input features`)

# Column: FC estimation
data_sumtable$`Way of estimating the underlying functional connectivities`<- gsub("Group-information guided","Gig",data_sumtable$`Way of estimating the underlying functional connectivities`)

# Column: Algorithms
data_sumtable$`Algorithm(s) of the final classifier(s)`<- gsub("graph convolutional network","GCN",data_sumtable$`Algorithm(s) of the final classifier(s)`)

## Step 2: delete columns (Age group and Year)
data_sumtable$`Age group`<- NULL
data_sumtable$Year <- NULL

## Step 3: change column names
colnames(data_sumtable) <- gsub("Sample size","N",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Accuracy_rounded","Best Acc",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Responders/nonresponders","Responders/ nonresponders",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Definition of treatment outcome","Definition treatment outcome",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Type of functional-connectivity-based input features","Input features",colnames(data_sumtable))
colnames(data_sumtable) <- gsub("Way of estimating the underlying functional connectivities","Estimating FCs",colnames(data_sumtable))

## Step 4: turn values of numeric variables (N and Best ACC) into strings
data_sumtable$N <- as.character(data_sumtable$N)
data_sumtable$`Best Acc` <- as.character(data_sumtable$`Best Acc`)

1.1 Create flextable of summary table for publication

# More info about settings in flextable: https://ardata-fr.github.io/flextable-book/cell-content.html

# Set defaults
set_flextable_defaults(font.family = "Arial",
font.size = 8,
padding.bottom = 3,
padding.top = 3,
padding.left = 0.5,
paddings.right = 0.5,
#theme_fun = "theme_apa",
theme_fun = NULL,
text.align = "left",
line_spacing = 1.5)

# Initialize flex_table
apa_sumtable <- flextable(data_sumtable)

# Set table properties
apa_sumtable <- set_table_properties(apa_sumtable, width = 1, layout = "autofit")

# Save flextable to word
margins <- page_mar(
  bottom = 0.5,
  top = 0.5,
  right = 0.5,
  left = 0.5,
  header = 0.5,
  footer = 0.5,
  gutter = 0.5
)
format_table <- prop_section(
  page_size = page_size(orient = "landscape"),
  page_margins = margins)

flextable::save_as_docx(apa_sumtable, path = "plots/table_1_extraction.docx", pr_section = format_table)

data_sumtable

2 RQ 1: How well can treatment outcome in internalizing disorders be predicted by features based on resting-state functional connectivities?

2.1 Calculate reported and balanced accuracy of the best model

The mean accuracy of the best model per study is 80%, with a range of 61% - 90%. The “balanced mean accuracy” is 76%, with a range of 58% - 89%.

2.2 Plot reported and balanced accuracy without (1) and with risk of data leakage (2)

# Add column for ROB to data_acc using information from 4.8. in data_PROBAST
studies_low_ROB <- c(data_PROBAST[data_PROBAST$`4.8 Were model overfitting and optimism in model performance accounted for?` == "Y",c("Study")])$Study
data_acc$ROB <- ifelse(data_acc$Study %in% studies_low_ROB, "low risk of data leakage", "high risk of data leakage")

# Bring data_acc into longformat
data_acc_long <- reshape(data = data_acc, idvar = "Study", varying = c("Accuracy_rounded","Accuracy_controlled"), v.name = "metric", times = c("Accuracy_rounded","Accuracy_controlled"), new.row.names = 1:1000, direction = "long")

data_acc_long$time <- as.factor(data_acc_long$time)
data_acc_long$time <- factor(data_acc_long$time, levels = c("Accuracy_rounded", "Accuracy_controlled"))

# Plot data without risk of bias 
labels_acc_germ <- c("berichtete Genauigkeit","für Zufallsniveau\nkontrollierte Genauigkeit")
labels_acc_eng <- c("reported\naccuracy","balanced\naccuracy")
plot_acc_contr_violin <- ggplot(data = data_acc_long, aes(y =`metric`, x = `time`))+
  geom_violin() +
  geom_quasirandom(size = size_dots) +
  stat_summary(
    geom = "point",
    fun.y = "mean",
    col = "black",
    size = size_dots,
    shape = 24,
    fill = "red"
  )+
  labs(x="", y = "Accuracy in %")+
  scale_x_discrete(labels = labels_acc_eng)

# Plot data with risk of bias
plot_acc_contr_violin_rob <- ggplot(data = data_acc_long, aes(y =`metric`, x = `time`))+
  geom_violin() +
  geom_quasirandom(size = size_dots-0.5, aes(colour = `ROB`)) +
  scale_colour_manual(name = "Risk of bias", values=c("grey69","black"))+
  stat_summary(
    geom = "point",
    fun.y = "mean",
    col = "black",
    size = size_dots -0.5,
    shape = 24,
    fill = "red"
  )+
  labs(x="", y = "Accuracy in %")+
      theme(legend.position = "bottom")+
  theme(legend.title=element_blank()) +
  scale_x_discrete(labels = labels_acc_eng)+
    guides(color = guide_legend(nrow = 2, byrow = TRUE))

# Display and Save both plots
plot_acc_contr_violin

plot_acc_contr_violin_rob

ggsave("plots/violin_acc_normal_and_controlled.svg", plot_acc_contr_violin)
ggsave("plots/violin_acc_normal_and_controlled_rob.svg",plot_acc_contr_violin_rob, height = 4)

2.3 Plot sample size and balanced accuracy for each treatment type

The mean sample size was N = 75, with a range of [18, 144].

# Add column for type of treatment manually to data_acc
data_acc$Treatment <- c("rTMS","medication","rTMS","medication","ECT","medication","medication and psychotherapy","ECT","medication","ECT","medication","psychotherapy","psychotherapy")

# Plot settings
size_half = size_dots * 1.5
pos_half_vert = 0.4
pos_half_horiz = 0.2

# Plot sample size and balanced accuracy
plot_acc_contr_sample_size <- ggplot(data = data_acc, aes(y =`Accuracy_controlled`, x = `Sample_size`))+
    theme(text = element_text(family = "Arial"))+
  geom_point(aes(color = `Treatment`), size = size_dots)+ #draw point for all treatments
  geom_text(data = data_acc[data_acc$Treatment == "medication and psychotherapy",],
            label = "\u25D7", 
            aes(`Sample_size`, `Accuracy_controlled`,colour = "medication"),  
            size= size_half, 
            hjust = pos_half_horiz,
            vjust = pos_half_vert,
            family = "Lucida Sans Unicode",
            key_glyph = draw_key_blank)+
   geom_text(data = data_acc[data_acc$Treatment == "medication and psychotherapy",],
             label = "\u25D6", 
             aes(`Sample_size`, `Accuracy_controlled`,colour = "psychotherapy"),  
             size= size_half, 
             hjust = pos_half_horiz + 0.57,
             vjust= pos_half_vert,
             family = "Lucida Sans Unicode",
             key_glyph = draw_key_blank)+
  scale_color_manual(
    breaks = c("medication", "ECT","psychotherapy","rTMS"),
    values = c("ECT" = color_pal[1], "medication" = color_pal[2], "medication and psychotherapy" = "grey","psychotherapy" = color_pal[3], "rTMS" = color_pal[4])
  )+
  geom_smooth(method = "lm", color = "black", size = 0.5)+
  labs(y = "Balanced accuracy in %", x = "Sample size")+
  scale_x_continuous(limits=c(15,147), breaks = c(25,50,75,100,125,150)) +
    theme(legend.position = "bottom")+
  theme(legend.title=element_blank()) +
  guides(color = guide_legend(nrow = 2, byrow = TRUE))

ggsave("plots/plot_samplesize.svg",plot_acc_contr_sample_size, height = 4)

2.4 Figure 2: Combine plots

# Combine both plots (Accuracy with risk of bias and Sample size and balanced accuracy)
plot_combined <- plot_acc_contr_violin_rob + plot_acc_contr_sample_size + plot_annotation(tag_levels = "A") + plot_layout(ncol = 2)

plot_combined

cairo_pdf("plots/figure_2_acc_sample_size.pdf", height = 3.5)
plot_combined
dev.off()
## png 
##   2
ggsave("plots/figure_2_acc_sample_size.svg",plot_combined, height = 3.5)

3 RQ 2.1: Which approaches are taken to draw inferences about the predictive value of specific features?

3.1 Remove studies that did not calculate feature importance

data_imp_features_clean <- data_imp_features[!(is.na(data_imp_features$Features_with_high_predictive_value)),]

12 out of 13 studies made a statement on feature importance.

3.2 Plot approaches used for calculation of feature importance

# Add new rows when two categories were chosen
data_imp_features_clean_way <- data_imp_features_clean

for (row_idx in 1:nrow(data_imp_features_clean)){
  entry <- data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[row_idx]
  if (grepl(";",entry)){
  categories <- strsplit(entry,"; ")[[1]]
  # Copy row
  data_imp_features_clean_way <- rbind(data_imp_features_clean_way,data_imp_features_clean_way[row_idx,])
  # Replace category in old row
   data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[row_idx]<-categories[1]
  # Replace category in new row
   data_imp_features_clean_way$Way_of_measuring_predictive_value_categories[nrow(data_imp_features_clean_way)]<-categories[2]
  }
}

# Plot data
plot_measure_imp <- ggplot2::ggplot(data = data_imp_features_clean_way,aes(x = `Way_of_measuring_predictive_value_categories`, color = `Study`))+
  geom_bar() +
  theme(legend.position = "none")+ 
  xlab("")+
  coord_flip()+
  ggtitle("Ways to measure high predictive value")
ggplotly(plot_measure_imp)

4 RQ 2.2: Which brain regions are important for the prediction of treatment outcome?

4.1 Bring data into long-format

# Get column names
cols_tested <- colnames(data_imp_features_clean )[endsWith(colnames(data_imp_features_clean ),"_tested")]
cols_important <- colnames(data_imp_features_clean )[endsWith(colnames(data_imp_features_clean ),"_important")]

# First: Reshape tested-columns
data_imp_features_clean$ID = rownames(data_imp_features_clean)
data_ROIs_tested_long <- reshape(data = data_imp_features_clean,idvar = "Study", new.row.names = 1:20000,
          varying = cols_tested, v.name = "tested", times = cols_tested, drop = cols_important, direction = "long")

colnames(data_ROIs_tested_long)[colnames(data_ROIs_tested_long)=="time"] <- 'ROI'
data_ROIs_tested_long$ROI <- gsub("_tested","",data_ROIs_tested_long$ROI)

data_ROIs_tested_long$ID <- paste(data_ROIs_tested_long$`Study`,data_ROIs_tested_long$ROI)

# Second: Reshape important-columns
data_ROIs_important_long <- reshape(data = data_imp_features_clean , idvar = "ID",new.row.names = 1:20000,
          varying = cols_important, v.name = "important", times = cols_important, direction = "long", drop = cols_tested)

data_ROIs_important_long$ROI <- gsub("_important","",data_ROIs_important_long$time)

data_ROIs_important_long$ID <- paste(data_ROIs_important_long$`Study`,data_ROIs_important_long$ROI)

# Third: Merge both data sets
data_ROIs <- merge(data_ROIs_tested_long,data_ROIs_important_long)

data_ROIs$time <- gsub("_important","",data_ROIs$time)
colnames(data_ROIs)[colnames(data_ROIs)=="time"] <- 'Region'

# Exclude ROIs, when they were not tested
data_ROIs_only_tested <- data_ROIs[!c(data_ROIs$tested == "n"|is.na(data_ROIs$tested)|data_ROIs$tested == "NA"),]

# Convert to factor
data_ROIs_only_tested$important <- as.factor(data_ROIs_only_tested$important)

4.2 Change brain regions names

data_ROIs_only_tested$Region <-
gsub("_"," ",data_ROIs_only_tested$Region)

4.3 Create (wide) data set with absolute and relative frequencies per region

# Wide data set with absolute and relative freq per region
freq_all <- as.data.frame(table(data_ROIs_only_tested$Region))

data_ROIs_only_tested_important <- data_ROIs_only_tested[data_ROIs_only_tested$important=="y",]
data_ROIs_only_tested_nonimportant <- data_ROIs_only_tested[data_ROIs_only_tested$important=="n",]
freq_important <- as.data.frame(table(data_ROIs_only_tested_important$Region))
colnames(freq_important) <- c("Var1","Abs_frequency")
freq_nonimportant <- as.data.frame(table(data_ROIs_only_tested_nonimportant$Region))
colnames(freq_nonimportant) <- c("Var1","Freq_nonimportant")

freq_all <- merge(freq_all,freq_important,by= "Var1")
freq_all <- merge(freq_all,freq_nonimportant,by= "Var1")
freq_all$Rel_frequency <- round((freq_all$Abs_frequency/freq_all$Freq)*100)

# Get features with relative frequencies above 30% and order them according to frequency
temp_dataset <- freq_all[freq_all$Rel_frequency>30, c("Var1","Rel_frequency")]
temp_dataset <- arrange(temp_dataset,Rel_frequency)
Regions_high_freq <- temp_dataset$Var1

data_ROIs_only_tested$Region <- factor(data_ROIs_only_tested$Region, levels = Regions_high_freq)

4.4 Figure 3: Plot feature importance (predictive value in absolute and relative frequencies)

This plot shows whose connectivities were particularily important for the prediction of TR.

# Reduce data set to only high frequency regions
data_ROIs_only_tested_high_freq <- data_ROIs_only_tested[data_ROIs_only_tested$Region %in% Regions_high_freq,]

plot_feat_pred_value <- ggplot2::ggplot(data = data_ROIs_only_tested_high_freq, aes(x =`Region`, fill= `important`))+
  geom_bar()+
  #theme(axis.text.x = element_text(angle = 90))+
  scale_fill_brewer(palette = "Oranges")+
  geom_text(aes(label = scales::percent(..count../tapply(..count.., ..x.. ,sum)[..x..])),stat = "count", position= position_stack(vjust = 0.5))+
  xlab("")+
  coord_flip()
#https://stackoverflow.com/questions/55680449/ggplot-filled-barplot-with-percentage-labels

# Change labels 
scaleFUN <- function(x) x*100

legend_predvalue_eng <- c("no predictive value", "predictive value")

# Plot
plot_feat_pred_value_2 <- ggplot(data = data_ROIs_only_tested_high_freq, aes(x =`Region`, fill= `important`))+
  geom_bar(aes (y = ..count../tapply(..count.., ..x.. ,sum)[..x..]))+
  #theme(axis.text.x = element_text(angle = 90))+
  scale_fill_brewer(palette = "Oranges", labels = legend_predvalue_eng)+
  scale_y_continuous(labels=scaleFUN)+
  geom_text(stat = "count", aes(label = paste0("n=",..count..)), position = position_fill(vjust =0.5))+
  xlab("")+
  ylab("Relative Frequency in %")+
  theme(legend.title=element_blank(),legend.position = "right") +
  #theme (plot.margin=unit (c (8,5.5,5.5,5.5),units = "pt"))+
  #scale_fill_manual()+
  coord_flip()

plot_feat_pred_value_2 

# Save plot
cairo_pdf("plots/figure_3_predictive_value.pdf", height = 3.5)
plot_feat_pred_value_2 
dev.off()
## png 
##   2
ggsave("plots/figure_3_predictive_value.svg",plot_feat_pred_value_2,height = 4)

5 RQ 3: Which approaches are taken to reduce the large amount of theoretically initially available functional connectivities to a small set of features to be used in the final classifier(s)?

# Bring data into longformat
cols_reduction <- colnames(data_dimreduction)[4:9]
data_dimreduction_long <- reshape(data = data_dimreduction, idvar = "Study",varying = cols_reduction, v.name = "applied", times = cols_reduction, new.row.names = 1:1000, direction = "long")

# Plot for feature generation
plot_reduction_generation <- ggplot(data = data_dimreduction_long[(!(is.na(data_dimreduction_long$applied))& data_dimreduction_long$time %in% c("atlas-based_parcellation", "data-driven_parcellation", "theory-based_ROI-/connectivity-selection")),],aes(x = `time`, color = `Study`))+
  geom_bar() +
  coord_flip() +
  xlab("")+
  theme(legend.position = "none", plot.title = element_text(size =14))+ 
  ggtitle("Feature generation")

ggplotly(plot_reduction_generation)
# Plot for feature extraction
cols_feat_extract <- colnames(data_dimreduction)[7:9]

plot_reduction_extraction <- ggplot2::ggplot(data = data_dimreduction_long[(!(is.na(data_dimreduction_long$applied))& data_dimreduction_long$time %in% cols_feat_extract),],aes(x = `time`, color = `Study`))+
  geom_bar() +
  theme(legend.position = "none", plot.title = element_text(size=14))+ 
  coord_flip() +
  xlab("")+
  ggtitle("Feature selection")
ggplotly(plot_reduction_extraction)

6 RQ 4: PROBAST rating

# Final rating columns
cols_final_rating <- c("Final rating domain 1", "Final rating domain 2", "Final rating domain 3", "Final rating domain 4", "Final rating")

data_PROBAST_plot <- data_PROBAST[c("Study",cols_final_rating)]

# Bring data into long-format
data_PROBAST_plot_long <- reshape(data = data_PROBAST_plot,idvar = "Study", new.row.names = 1:20000,varying = cols_final_rating, v.name = "ROB", times = cols_final_rating, direction = "long")
colnames(data_PROBAST_plot_long)[colnames(data_PROBAST_plot_long)=="time"] <- 'Rating_domain'

# Order and rename factor rating_domain
PROBAST_domains_eng <- c("ROB Sample","ROB Predictors", "ROB Outcome", "ROB Analysis", "ROB Total")
data_PROBAST_plot_long$Rating_domain <- factor(data_PROBAST_plot_long$Rating_domain, 
                                               levels = c("Final rating domain 1", "Final rating domain 2", "Final rating domain 3", "Final rating domain 4", "Final rating"), labels = PROBAST_domains_eng)

scaleFUN <- function(x) x*100

# Prepare final rating domains labels to make one label bold
breaks <- levels(data_PROBAST_plot_long$Rating_domain)
labels <- as.expression(breaks)
labels[[5]] <- bquote(bold(.(labels[[5]])))

# Plot data
legend_PROBAST_eng <- c("high", "low")
plot_PROBAST <- ggplot(data = data_PROBAST_plot_long,aes(x = `Rating_domain`, fill = `ROB`))+ geom_bar(aes (alpha = Rating_domain == "ROB Gesamt", y = ..count../tapply(..count.., ..x.. ,sum)[..x..]))+
  coord_flip()+
  scale_fill_manual(values = c("red3","chartreuse3"), name = "Risk of bias (ROB)", labels = legend_PROBAST_eng <- c("high", "low")) +
  scale_y_continuous(labels=scaleFUN)+
  scale_alpha_manual(values = c("TRUE" = 1, "FALSE" = 0.6), guide = F)+
  scale_x_discrete(label = labels, breaks = breaks)+
   theme(legend.position = "top")+
  xlab("")+
  ylab("Relative Accuracy in %")

# Save plot
plot_PROBAST

ggsave("plots/figure_S1_PROBAST.svg",plot_PROBAST, height = 4.5, width = 9)